* Load the data *
clear
cd "C:\Users\wb513442\Downloads\JoP Data Replication Stage\Replication Files\Submission v2 Aug 2023" 
log using replication, replace smcl
use data_replication_2018.dta, replace
* PART 1. MAIN RESULTS *

* Table 1. Lower Turnout for Females in Separate-Gender Polling Stations *

* Full Sample Regressions: Columns 1 and 2 *
eststo clear

reg female_turnout_ps female if assembly_cat==1, cluster(id)
su female_turnout_ps if e(sample) & assembly_cat==1 & female==0
estadd local building "No"
eststo Model1
estimates store main_1_f

areg female_turnout_ps female if assembly_cat==1, cluster(id) absorb(id)
su female_turnout_ps if e(sample) & assembly_cat==1 & female==0
estadd local building "Yes"
eststo Model2
estimates store main_2_f

* Restricted Sample Regressions (by location type): Columns 3 & 4 *

areg female_turnout_ps female if assembly_cat==1 & two_types==1, cluster(id) absorb(id)
su female_turnout_ps if e(sample) & assembly_cat==1 & female==0 & two_types==1
estadd local building "Yes"
eststo Model3
estimates store main_3_f

areg female_turnout_ps female if assembly_cat==1 & all_types==1, cluster(id) absorb(id)
su female_turnout_ps if e(sample) & assembly_cat==1 & female==0 & all_types==1
estadd local building "Yes"
eststo Model4
estimates store main_4_f

esttab Model1 Model2 Model3 Model4 using turnout_female.tex, label replace booktabs keep(female) ///
 b(3) se(3) sfmt(3)  title(Female Turnout\label{tab:turnout_female}) ///
 scalars("r2_a Adjusted R\textsuperscript{2}" "building Location Fixed Effects") mtitle("" "" "" "") ///
 unstack collabels(none) order(female) star(* 0.10 ** 0.05 *** 0.01)
 
* PART 2. ONLINE APPENDIX *

* Online Appendix Table 1. Descriptive Statistics by PS Type *

preserve

putexcel set "Table_1_new.xlsx", sheet(Sheet1) replace
putexcel A1 = ("") B1 = ("Overall")  C1 = ("Female-Only") D1 = ("Male-Only") E1 = ("Combined")
putexcel A2 = ("Total Voters") A4 = ("Female Registered Voters") A6 = ("Female Votes Cast") A8 = ("Male Registered Voters") A10 = ("Male Votes Cast") A12 = ("Share of Valid Votes (\%)")  

* Overall
tabstat total_voters female_voters female_votes male_voters male_votes valid_share if assembly_cat==1, stat(mean sd) col(stat) save
matrix desc_stat = r(StatTotal)
gen mean_1 = desc_stat[1,1]
gen mean_2 = desc_stat[1,2]
gen mean_3 = desc_stat[1,3]
gen mean_4 = desc_stat[1,4]
gen mean_5 = desc_stat[1,5]
gen mean_6 = desc_stat[1,6]

putexcel B2 = (mean_1) B4 = (mean_2) B6 = (mean_3) B8 = (mean_4) B10 = (mean_5) B12 = (mean_6)  

gen sd_1 = desc_stat[2,1]
gen sd_2 = desc_stat[2,2]
gen sd_3 = desc_stat[2,3]
gen sd_4 = desc_stat[2,4]
gen sd_5 = desc_stat[2,5]
gen sd_6 = desc_stat[2,6]

putexcel B3 = (sd_1) B5 = (sd_2) B7 = (sd_3) B9 = (sd_4) B11 = (sd_5) B13 = (sd_6)  

* Female-Only
tabstat total_voters female_voters female_votes male_voters male_votes valid_share if assembly_cat==1 & female==1, stat(mean sd) col(stat) save
matrix desc_stat = r(StatTotal)
gen mean_11 = desc_stat[1,1]
gen mean_12 = desc_stat[1,2]
gen mean_13 = desc_stat[1,3]
gen mean_14 = desc_stat[1,4]
gen mean_15 = desc_stat[1,5]
gen mean_16 = desc_stat[1,6]

putexcel C2 = (mean_11) C4 = (mean_12) C6 = (mean_13) C8 = (mean_14) C10 = (mean_15) C12 = (mean_16)  

gen sd_11 = desc_stat[2,1]
gen sd_12 = desc_stat[2,2]
gen sd_13 = desc_stat[2,3]
gen sd_14 = desc_stat[2,4]
gen sd_15 = desc_stat[2,5]
gen sd_16 = desc_stat[2,6]

putexcel C3 = (sd_11) C5 = (sd_12) C7 = (sd_13) C9 = (sd_14) C11 = (sd_15) C13 = (sd_16) 

* Male-Only
tabstat total_voters female_voters female_votes male_voters male_votes valid_share if assembly_cat==1 & male==1, stat(mean sd) col(stat) save
matrix desc_stat = r(StatTotal)
gen mean_21 = desc_stat[1,1]
gen mean_22 = desc_stat[1,2]
gen mean_23 = desc_stat[1,3]
gen mean_24 = desc_stat[1,4]
gen mean_25 = desc_stat[1,5]
gen mean_26 = desc_stat[1,6]

putexcel D2 = (mean_21) D4 = (mean_22) D6 = (mean_23) D8 = (mean_24) D10 = (mean_25) D12 = (mean_26)  

gen sd_21 = desc_stat[2,1]
gen sd_22 = desc_stat[2,2]
gen sd_23 = desc_stat[2,3]
gen sd_24 = desc_stat[2,4]
gen sd_25 = desc_stat[2,5]
gen sd_26 = desc_stat[2,6]

putexcel D3 = (sd_21) D5 = (sd_22) D7 = (sd_23) D9 = (sd_24) D11 = (sd_25) D13 = (sd_26) 

* Combined
tabstat total_voters female_voters female_votes male_voters male_votes valid_share if assembly_cat==1 & female==0 & male==0, stat(mean sd) col(stat) save
matrix desc_stat = r(StatTotal)
gen mean_31 = desc_stat[1,1]
gen mean_32 = desc_stat[1,2]
gen mean_33 = desc_stat[1,3]
gen mean_34 = desc_stat[1,4]
gen mean_35 = desc_stat[1,5]
gen mean_36 = desc_stat[1,6]

putexcel E2 = (mean_31) E4 = (mean_32) E6 = (mean_33) E8 = (mean_34) E10 = (mean_35) E12 = (mean_36)  

gen sd_31 = desc_stat[2,1]
gen sd_32 = desc_stat[2,2]
gen sd_33 = desc_stat[2,3]
gen sd_34 = desc_stat[2,4]
gen sd_35 = desc_stat[2,5]
gen sd_36 = desc_stat[2,6]

putexcel E3 = (sd_31) E5 = (sd_32) E7 = (sd_33) E9 = (sd_34) E11 = (sd_35) E13 = (sd_36) 

restore

* Online Appendix Table 2. Descriptive Statistics by PS Type for PL with All PS Types *

preserve

putexcel set "Table_2_new.xlsx", sheet(Sheet1) replace
putexcel A1 = ("") B1 = ("Overall")  C1 = ("Female-Only") D1 = ("Male-Only") E1 = ("Combined")
putexcel A2 = ("Total Voters") A4 = ("Female Registered Voters") A6 = ("Female Votes Cast") A8 = ("Male Registered Voters") A10 = ("Male Votes Cast") A12 = ("Share of Valid Votes (\%)")  

* Overall
tabstat total_voters female_voters female_votes male_voters male_votes valid_share if assembly_cat==1 & all_types==1, stat(mean sd) col(stat) save
matrix desc_stat = r(StatTotal)
gen mean_1 = desc_stat[1,1]
gen mean_2 = desc_stat[1,2]
gen mean_3 = desc_stat[1,3]
gen mean_4 = desc_stat[1,4]
gen mean_5 = desc_stat[1,5]
gen mean_6 = desc_stat[1,6]

putexcel B2 = (mean_1) B4 = (mean_2) B6 = (mean_3) B8 = (mean_4) B10 = (mean_5) B12 = (mean_6)  

gen sd_1 = desc_stat[2,1]
gen sd_2 = desc_stat[2,2]
gen sd_3 = desc_stat[2,3]
gen sd_4 = desc_stat[2,4]
gen sd_5 = desc_stat[2,5]
gen sd_6 = desc_stat[2,6]

putexcel B3 = (sd_1) B5 = (sd_2) B7 = (sd_3) B9 = (sd_4) B11 = (sd_5) B13 = (sd_6)  

* Female-Only
tabstat total_voters female_voters female_votes male_voters male_votes valid_share if assembly_cat==1 & female==1 & all_types==1, stat(mean sd) col(stat) save
matrix desc_stat = r(StatTotal)
gen mean_11 = desc_stat[1,1]
gen mean_12 = desc_stat[1,2]
gen mean_13 = desc_stat[1,3]
gen mean_14 = desc_stat[1,4]
gen mean_15 = desc_stat[1,5]
gen mean_16 = desc_stat[1,6]

putexcel C2 = (mean_11) C4 = (mean_12) C6 = (mean_13) C8 = (mean_14) C10 = (mean_15) C12 = (mean_16)  

gen sd_11 = desc_stat[2,1]
gen sd_12 = desc_stat[2,2]
gen sd_13 = desc_stat[2,3]
gen sd_14 = desc_stat[2,4]
gen sd_15 = desc_stat[2,5]
gen sd_16 = desc_stat[2,6]

putexcel C3 = (sd_11) C5 = (sd_12) C7 = (sd_13) C9 = (sd_14) C11 = (sd_15) C13 = (sd_16) 

* Male-Only
tabstat total_voters female_voters female_votes male_voters male_votes valid_share if assembly_cat==1 & male==1 & all_types==1, stat(mean sd) col(stat) save
matrix desc_stat = r(StatTotal)
gen mean_21 = desc_stat[1,1]
gen mean_22 = desc_stat[1,2]
gen mean_23 = desc_stat[1,3]
gen mean_24 = desc_stat[1,4]
gen mean_25 = desc_stat[1,5]
gen mean_26 = desc_stat[1,6]

putexcel D2 = (mean_21) D4 = (mean_22) D6 = (mean_23) D8 = (mean_24) D10 = (mean_25) D12 = (mean_26)  

gen sd_21 = desc_stat[2,1]
gen sd_22 = desc_stat[2,2]
gen sd_23 = desc_stat[2,3]
gen sd_24 = desc_stat[2,4]
gen sd_25 = desc_stat[2,5]
gen sd_26 = desc_stat[2,6]

putexcel D3 = (sd_21) D5 = (sd_22) D7 = (sd_23) D9 = (sd_24) D11 = (sd_25) D13 = (sd_26) 

* Combined
tabstat total_voters female_voters female_votes male_voters male_votes valid_share if assembly_cat==1 & female==0 & male==0 & all_types==1, stat(mean sd) col(stat) save
matrix desc_stat = r(StatTotal)
gen mean_31 = desc_stat[1,1]
gen mean_32 = desc_stat[1,2]
gen mean_33 = desc_stat[1,3]
gen mean_34 = desc_stat[1,4]
gen mean_35 = desc_stat[1,5]
gen mean_36 = desc_stat[1,6]

putexcel E2 = (mean_31) E4 = (mean_32) E6 = (mean_33) E8 = (mean_34) E10 = (mean_35) E12 = (mean_36)  

gen sd_31 = desc_stat[2,1]
gen sd_32 = desc_stat[2,2]
gen sd_33 = desc_stat[2,3]
gen sd_34 = desc_stat[2,4]
gen sd_35 = desc_stat[2,5]
gen sd_36 = desc_stat[2,6]

putexcel E3 = (sd_31) E5 = (sd_32) E7 = (sd_33) E9 = (sd_34) E11 = (sd_35) E13 = (sd_36) 

restore

* Online Appendix Table 3. Share of Different Types of PS by Province *

bysort province: summ share_m share_f share_c

* Online Appendix Table 4. Share of Polling Locations by Type and Province *

bysort province assembly_cat all_types: egen num_pl = nvals(id)
by province: summ num_pl if assembly_cat==1 & all_types==1

bysort province assembly_cat two_types: egen num_pl2 = nvals(id)
by province: summ num_pl2 if assembly_cat==1 & two_types==1

bysort province assembly_cat: egen num_pl3 = nvals(id)
by province: summ num_pl3 if assembly_cat==1

* Online Appendix Table 5. Distribution of Polling Locations with Different Polling Stations *

preserve
egen n12 = nvals(id) if n_comb==0 & n_male+n_female==1
egen n13 = nvals(id) if n_comb==0 & n_male+n_female==2
egen n14 = nvals(id) if n_comb==0 & n_male+n_female>=3

egen n21 = nvals(id) if n_comb==1 & n_male==0 & n_female==0
egen n22 = nvals(id) if n_comb==1 & n_male+n_female==1
egen n23 = nvals(id) if n_comb==1 & n_male+n_female==2
egen n24 = nvals(id) if n_comb==1 & n_male+n_female>=3

egen n31 = nvals(id) if n_comb==2 & n_male==0 & n_female==0
egen n32 = nvals(id) if n_comb==2 & n_male+n_female==1
egen n33 = nvals(id) if n_comb==2 & n_male+n_female==2
egen n34 = nvals(id) if n_comb==2 & n_male+n_female>=3

egen n41 = nvals(id) if n_comb>=3 & n_male==0 & n_female==0
egen n42 = nvals(id) if n_comb>3 & n_male+n_female==1
egen n43 = nvals(id) if n_comb>=3 & n_male+n_female==2
egen n44 = nvals(id) if n_comb>=3 & n_male+n_female>=3

summ n12-n14 n21-n24 n31-n34 n41-n44
restore 

* Table 6. Lower Turnout for Males in Separate-Gender Polling Stations*

* Full Sample Regressions: Columns 1 and 2 *
eststo clear

reg male_turnout_ps male if assembly_cat==1, cluster(id)
su male_turnout_ps if e(sample) & assembly_cat==1 & male==0
estadd local building "No"
eststo Model1
estimates store main_1_m

areg male_turnout_ps male if assembly_cat==1, cluster(id) absorb(id)
su male_turnout_ps if e(sample) & assembly_cat==1 & male==0
estadd local building "Yes"
eststo Model2
estimates store main_2_m

* Restricted Sample Regressions (by location type): Columns 3 & 4 *

areg male_turnout_ps male if assembly_cat==1 & two_types==1, cluster(id) absorb(id)
su male_turnout_ps if e(sample) & assembly_cat==1 & male==0 & two_types==1
estadd local building "Yes"
eststo Model3
estimates store main_3_m

areg male_turnout_ps male if assembly_cat==1 & all_types==1, cluster(id) absorb(id)
su male_turnout_ps if e(sample) & assembly_cat==1 & male==0 & all_types==1
estadd local building "Yes"
eststo Model4
estimates store main_4_m

esttab Model1 Model2 Model3 Model4 using turnout_male.tex, label replace booktabs keep(male) ///
 b(3) se(3) sfmt(3)  title(Male Turnout\label{tab:turnout_male}) ///
 scalars("r2_a Adjusted R\textsuperscript{2}" "building Location Fixed Effects") mtitle("" "" "" "") ///
 unstack collabels(none) order(male) star(* 0.10 ** 0.05 *** 0.01)

* PART 3: FIGURES * 

* Figure 1. Gender Gap in Turnout in Pakistan *

*C. DISTRIBUTIONS

twoway kdensity female_turnout_ps, bw(0.1) xtitle("Turnout") ytitle("Density") xtick(#13) xlabel(0 (0.2) 1, format(%8.1f)) legend(label(1 "Female"))  legend(label(2 "Male")) lpattern(solid)  lwidth(medthick medthick) || kdensity male_turnout_ps, bw(0.1) lpattern(shortdash) lcolor(orange*1.5)

graph export "dist_turnout.png", as(png) replace

* Figure 2. Combined Polling Stations Have Higher Turnout than Separate-Gender Ones *

*A. TURNOUT DISTRIBUTION FOR FEMALES AND MALES BY PS TYPE

twoway kdensity female_turnout_ps if ps_gender_type=="Combined", bw(0.1) xtitle("Female Turnout") ytitle("Density") xtick(#13) xlabel(0 (0.2) 1, format(%8.1f)) legend(label(1 "Combined PS"))  legend(label(2 "Female Only PS")) lpattern(solid)  lwidth(medthick medthick) || kdensity female_turnout_ps if ps_gender_type=="Female only", bw(0.1) lpattern(shortdash) lcolor(orange*1.5) name(female_turnout_pstype, replace)

twoway kdensity male_turnout_ps if ps_gender_type=="Combined", bw(0.1) xtitle("Male Turnout") ytitle("Density") xtick(#13) xlabel(0 (0.2) 1, format(%8.1f)) legend(label(1 "Combined PS"))  legend(label(2 "Male Only PS")) lpattern(solid)  lwidth(medthick medthick) || kdensity male_turnout_ps if ps_gender_type=="Male only", bw(0.1) lpattern(shortdash) lcolor(orange*1.5) name(male_turnout_pstype, replace) yscale(off) fxsize(90)

graph combine female_turnout_pstype male_turnout_pstype, cols(2) iscale(1)  xsize(8) xcommon ycommon

graph export "turnout_pstype.png", as(png) replace

* Online Appendix Figure 1. Scatter Plots of Constituency Characteristics against Share of Combined PS in a Constituency *

scatter constituency_registered_voters_t n_comb_share_cons if assembly_cat==1, msize(tiny) ///
xtitle("% Share of Combined PS in Constituency") ytitle("Total Number of Registered Voters in Constituency") ///
ylabel(, labsize(small)) xlabel(, labsize(small))
graph export "totalregvoters_cons.png", as(png) replace

scatter n_candidates n_comb_share_cons if assembly_cat==1, msize(tiny) ///
xtitle("% Share of Combined PS in Constituency") ytitle("Total Number of Candidates in Constituency") ///
ylabel(, labsize(small)) xlabel(, labsize(small))
graph export "totalcandidates_cons.png", as(png) replace

scatter constituency_number_ps_total n_comb_share_cons if assembly_cat==1, msize(tiny) ///
xtitle("% Share of Combined PS in Constituency") ytitle("Total Number of PS in Constituency") ///
ylabel(, labsize(small)) xlabel(, labsize(small))
graph export "totalps_cons.png", as(png) replace

* Online Appendix Figure 2. Scatter Plots of Constituency Characteristics against Share of Female-Only PS in a Constituency *

scatter constituency_registered_voters_f n_f_share_cons if assembly_cat==1, msize(tiny) ///
xtitle("% Share of Female Only PS in Constituency") ytitle("Total Number of Registered Female Voters in Constituency") ///
ylabel(, labsize(small)) xlabel(, labsize(small))
graph export "totalregvoters_consf.png", as(png) replace

scatter n_candidates n_f_share_cons if assembly_cat==1, msize(tiny) ///
xtitle("% Share of Female Only PS in Constituency") ytitle("Total Number of Candidates in Constituency") ///
ylabel(, labsize(small)) xlabel(, labsize(small)) 
graph export "totalcandidates_consf.png", as(png) replace

scatter constituency_number_ps_total n_f_share_cons if assembly_cat==1, msize(tiny) ///
xtitle("% Share of Female Only PS in Constituency") ytitle("Total Number of PS in Constituency") ///
ylabel(, labsize(small)) xlabel(, labsize(small))
graph export "totalps_consf.png", as(png) replace

log close 

** Figures using other datasets

* Figure 1. Gender Gap in Turnout in Pakistan *

*A. WORLD VALUES SURVEY CROSS-SECTION (using WVS dataset)
clear
import excel "vote_wvs.xlsx", sheet("Sheet1") firstrow

gen alw_usu_men= always_men+ usually_men
gen alw_usu_women= always_women+ usually_women
gen diff = always_women - always_men
separate diff, by(A == "Pakistan")

replace A = "Trin. & Tobago" if A == "Trinidad and Tobago"
graph bar diff0 diff1, over(A, sort(diff) descending lab(labsize(vsmall) ang(vertical))) nofill bar(1, bfcolor(none)) legend(off) ytitle("Difference between Female and Male Turnout", size(small)) ylabel(-0.2 (0.05) 0.1, format(%8.2f) labsize(small))

graph export "wvs_vote_bar.png", as(png) replace

*B. TURNOUT TREND (using summary data from FAFEN as in references)
clear
import excel "turnout_trend_consistent.xlsx", sheet("Sheet1") firstrow

twoway connected female_turnout year, ytitle("", axis(1)) xtitle("Year") ytitle("Turnout") xlabel(2008 (5) 2018) ylabel(0 (0.2) 1, format(%8.1f)) legend(label(1 "Female")) legend(label(2 "Male")) lwidth(medthick medthick) lpattern(solid)  || connected male_turnout year, lpattern(shortdash)

graph export "turnout_trend_consistent.png", as(png) replace

